# ----------------------------------------
#
# Replication files
# Catalinac, Amy (2025)
# Dominance Through Division: Group-based Clientelism in Japan
# 
#-----------------------------------------







#-----------------------------------------
# CHAPTER 5
#
# #-----------------------------------------








#-----------------------------------------
# PREPARATION

setwd("C:/Users/Amy Catalinac/Dropbox/TARGETING/replication2018/ANALYSIS OF MASTER DATA")

options(max.print=1000000)

# read in master data, composed of all non-split municipalities:
dat <- readRDS("Master_plus_Snow_Turn_Trans_Dis4.rds")

# subset to HOR election years
elec.dat <- dat[!is.na(dat$hor_electoral_district),]

#install.packages("dplyr")
#install.packages("readstata13")
#install.packages("stargazer")
#install.packages("lmtest")
#install.packages("plm")
#install.packages("clubSandwich")
#install.packages("apsrtable")
#install.packages("multiwayvcov")
#install.packages("car")
#install.packages("ggplot2")

library(dplyr)
library(readstata13)
library(stargazer)
library(lmtest)
library(plm)
library(clubSandwich)
library(multiwayvcov)
library(car)
library(ggplot2)










# -------------------------------
# TABLE 5.1
#
# CODING ELECTORAL DISTRICTS AS TO WHETHER TOURNAMENT IS POSSIBLE, 1980-2014
#
#---------------------------------

# Below, I subset master data to a specific HOR election.  For each election,
# I calculate:

# (a) 
# Number of electoral districts that are made up of one municipality.

# (b)
# Number of electoral districts that consist solely of split municipalities,
# or have a relatively large share of their voting population residing in a split municipality.

# (c)
# Number of electoral districts that are comprised solely of wards of ordinance-designated cities,
# which do not receive NTD.

# For reference, ordinance-designated cities are:
# Since 1980: Kyoto, Osaka, Yokohama, Kobe, Nagoya, Sapporo, Kawasaki, Fukuoka, 
# Hiroshima, and Kitakyushu cities.
# In 1989, Sendai City was added.
# In 1992, Chiba City was added.
# In 2003, Saitama City was added.
# In 2005, Shizuoka City was added.
# In 2006, Sakai City was added.
# In 2007, Niigata City and Hamamatsu City were added.
# In 2009, Okayama City was added.
# in 2010, Sagamihara City was added.
# in 2012, Kumamoto City was added.


# Note that I rely heavily on the base data (out_final_edit.rds, viewable as an Excel file
# "out_final_edit.xls") to code these districts.  
# The base data contains municipalities that are split across electoral districts, 
# whereas the master data used in the book ("Master_plus_Snow_Turn_Trans_Dis4.rds") does not.







# ---------------------------------------------------
#
# 1980
# 
# ---------------------------------------------------

# descriptive statistics from JHRED

# Total number of electoral districts: 130
# TOTAL NUMBER OF ELECTED MPS: 511
# TOTAL NUMBER OF LDPI MPS: 293
# TOTAL NUMBER OF LDPI LOSING CANDIDATES: 42



# ---------------------------------------------------
# a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 1980
pre.1980 <- subset(elec.dat, elec.dat$year==1980)

length(unique(pre.1980$district_year))
length(unique(pre.1980$hor_electoral_district))
# 129

# Data does not include electoral district 4604, which is Amami Islands
# district.  Merged into Kagoshima 1 in 1993.
# This missing district is tournament-possible.

# Total number of electoral districts comprising one muni:
sum(pre.1980$totmun_in_electoral_district==1)
# 0

# Make a vector containing all codes for electoral districts in 1980 election that were
# tournament-impossible:

no_t_1980 <- c()






# ---------------------------------------------------
# (b)
# Electoral districts with municipalities spanning more than one district.

# Eyeball base data, which has split_mun_jed==1 (this is out of master data).

# In base data, municipalities spanning more than one district appear in the data 
# n times, where n is number of electoral districts they are in.  These municipalities 
# are coded split_mun_jed==1.  
# Note that for these municipalities, mun_population is the population in the municipality 
# as a whole, whereas mun_voting_pop is specific to that section of the electoral district
# the municipality is in.
# There are three split 3 munis, which span 6 electoral districts districts

# Average number of voters in an electoral district:
mean(pre.1980$voting_pop_in_electoral_district)
# 608878.7

# If the split municipality is small (mun_voting_pop less than 10% of average number of voters
# in an electoral district), tournaments will still be possible in the district it is located in.
# So, we won't add these districts to the list of tournament-impossible districts.

# Find that 3 of 6 districts (701, 1201, 3301) have large shares of voters who live in split muni.

# add them to vector:

no_t_1980 <- c(no_t_1980, "1980_701", "1980_1201", "1980_3301")

length(no_t_1980)












# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

# To do this, let us identify municipalities in ordinance-designed cities, delete them from
# data frame, and count how many electoral districts this eliminated.

# These municipalities can be identified with two conditions satisfied:
# mun_ceif==NA *and* mun_ngaid == NA.
# They do not receive NTD and fiscal strength information is not produced for them.
# Eyeballing data, you can see that municipalities satisfying these conditios are in ODC.

# Let us first eliminate the small number of municipalities from the 1980 election for which we have
# voting data (from JED-M), but no NEEDs data (due to our inability to match them to a NEEDs observation).
# If we don't eliminate them, the below code will count them as municipalities in ordinance-designated cities:

nrow(pre.1980)
sum(is.na(pre.1980$muncode_num)) # 70

# Create a new data frame just with municipalities with muncode_num populated (matched to NEEDs data):
pre.1980.new <- subset(pre.1980, !is.na(pre.1980$muncode_num))

nrow(pre.1980.new)
# 3287
length(unique(pre.1980.new$hor_electoral_district))
# 129 districts

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.1980.nos <- subset(pre.1980.new, !(is.na(pre.1980.new$mun_ceif) & is.na(pre.1980.new$mun_ngaid)))
nrow(pre.1980.new) - nrow(pre.1980.nos)
# eliminated 102 municipalities

# Which districts in pre.1980.new did not make it into pre.1980.nos? These are districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.1980.new, !(pre.1980.new$hor_electoral_district %in% pre.1980.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 9 districts

# Eyeballing these districts:
seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only
# 1401 1404 2301 2306 2601 2702 2701 2706 2801
seirei.dists.only2 <- paste("1980", seirei.dists.only, sep="_")

# Add these to the vector of tournament-impossible districts:
no_t_1980 <- c(no_t_1980, seirei.dists.only2)

# make sure no duplicates
length(no_t_1980)
no_t_1980 <- no_t_1980[!duplicated(no_t_1980)]
length(no_t_1980)
# 12











# ---------------------------------------------------
#
# 1983
# 
# ---------------------------------------------------

# descriptive statistics from JHRED
# Total number of electoral districts: # 130
# TOTAL NUMBER OF ELECTED MPS: 511
# TOTAL NUMBER OF LDPI MPS: 261
# TOTAL NUMBER OF LDPI LOSING CANDIDATES: 120





# ---------------------------------------------------
# a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 1983
pre.1983 <- subset(elec.dat, elec.dat$year==1983)
length(unique(pre.1983$district_year))
length(unique(pre.1983$hor_electoral_district))
# 129

# Same as above (Amami islands missing)

# Total number of electoral districts comprising one muni:
sum(pre.1983$totmun_in_electoral_district==1)
# 0
# No districts comprise one municipality

no_t_1983 <- c()





# ---------------------------------------------------
# (b) # Electoral districts with municipalities spanning more than one district.

# There are three split 3 munis, which span 6 electoral districts districts

# what's the average number of voters in an electoral district?
mean(pre.1983$voting_pop_in_electoral_district)
# 634558.1

# Same as in 1980 election:

no_t_1983 <- c("1983_701", "1983_1201", "1983_3301")







# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.1983)
sum(is.na(pre.1983$muncode_num)) #63

# Create a new data frame just with munis for which munode_num is populated
pre.1983.new <- subset(pre.1983, !is.na(pre.1983$muncode_num))
nrow(pre.1983.new)
# 3287
length(unique(pre.1983.new$hor_electoral_district))
# 129 districts

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.1983.nos <- subset(pre.1983.new, !(is.na(pre.1983.new$mun_ceif) & is.na(pre.1983.new$mun_ngaid)))
nrow(pre.1983.new) - nrow(pre.1983.nos)
# got rid of 108

# Which districts in pre.1983.new did not make it into pre.1983.nos? These are districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.1983.new, !(pre.1983.new$hor_electoral_district %in% pre.1983.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 9 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("1983", seirei.dists.only, sep="_")

length(no_t_1983)
no_t_1983 <- c(no_t_1983, seirei.dists.only2)
length(no_t_1983)








# ---------------------------------------------------
#
# 1986
# 
# ---------------------------------------------------

# descriptive stats from JHRED

# Total number of electoral districts
# 130 (from RS).
# TOTAL NUMBER OF ELECTED MPS: 512
# TOTAL NUMBER OF LDPI MPS: 306
# TOTAL NUMBER OF LDPI LOSING CANDIDATES: 81




# ---------------------------------------------------
# a) Electoral districts comprised of a single municipality:

pre.1986 <- subset(elec.dat, elec.dat$year==1986)
length(unique(pre.1986$district_year))
length(unique(pre.1986$hor_electoral_district))
# 129

# Same as above

# Total number of electoral districts comprising one muni:
sum(pre.1986$totmun_in_electoral_district==1)
# 0
# No districts comprise one municipality

no_t_1986 <- c()




# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# what's the average number of voters in an electoral district?
mean(pre.1986$voting_pop_in_electoral_district)
# 649255.5

# Same as above

# In total, 3 of 6 districts (701, 1201, 3301) with large shares of voters who live
# in a municipality whose borders span another district -- cannot do tournament

no_t_1986 <- c("1986_701", "1986_1201", "1986_3301")





# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.1986)
sum(is.na(pre.1986$muncode_num)) 
# 70

pre.1986.new <- subset(pre.1986, !is.na(pre.1986$muncode_num))
nrow(pre.1986.new)
# 3287
length(unique(pre.1986.new$hor_electoral_district))
# 129 districts

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.1986.nos <- subset(pre.1986.new, !(is.na(pre.1986.new$mun_ceif) & is.na(pre.1986.new$mun_ngaid)))
nrow(pre.1986.new) - nrow(pre.1986.nos)
# got rid of 109

# Which districts in pre.1986.new did not make it into pre.1986.nos? These are districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.1986.new, !(pre.1986.new$hor_electoral_district %in% pre.1986.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 9 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("1986", seirei.dists.only, sep="_")

length(no_t_1986)
# 3

no_t_1986 <- c(no_t_1986, seirei.dists.only2)

# make sure no duplicates
length(no_t_1986)
no_t_1986 <- no_t_1986[!duplicated(no_t_1986)]
length(no_t_1986)















# ---------------------------------------------------
#
# 1990 (written as 1989 because data set is in fiscal year)
# 
# ---------------------------------------------------

# descriptive stats from JHRED:

# Total number of electoral districts
# 130 (from RS).
# TOTAL NUMBER OF ELECTED MPS, 1983: 512
# TOTAL NUMBER OF LDPI MPS, 1983: 290
# TOTAL NUMBER OF LDPI LOSING CANDIDATES: 135


# ---------------------------------------------------
# a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 1989
pre.1989 <- subset(elec.dat, elec.dat$year==1989)
length(unique(pre.1989$district_year))
length(unique(pre.1989$hor_electoral_district))
# 129

# same as above

# Total number of electoral districts comprising one muni:
sum(pre.1989$totmun_in_electoral_district==1)
# 0
# No districts comprise one municipality

no_t_1989 <- c()







# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# what's the average number of voters in an electoral district?
mean(pre.1989$voting_pop_in_electoral_district)
# 675998.2

# This time.,
# In total, 3 of 6 districts (701, 1201, 3301) with large shares of voters who live
# in a municipality whose borders span another district -- cannot do tournament

no_t_1989 <- c("1989_701", "1989_1201", "1989_3301")







# ---------------------------------------------------
# c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.1989)
sum(is.na(pre.1989$muncode_num))
pre.1989.new <- subset(pre.1989, !is.na(pre.1989$muncode_num))
nrow(pre.1989.new)
length(unique(pre.1989.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.1989.nos <- subset(pre.1989.new, !(is.na(pre.1989.new$mun_ceif) & is.na(pre.1989.new$mun_ngaid)))
nrow(pre.1989.new) - nrow(pre.1989.nos)
# got rid of 111

# Which districts in pre.1989.new did not make it into pre.1986.nos? These are districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.1989.new, !(pre.1989.new$hor_electoral_district %in% pre.1989.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 9 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("1989", seirei.dists.only, sep="_")

length(no_t_1989)
# 3

no_t_1989 <- c(no_t_1989, seirei.dists.only2)

# make sure no duplicates
length(no_t_1989)
no_t_1989 <- no_t_1989[!duplicated(no_t_1989)]
length(no_t_1989)







# ---------------------------------------------------
#
# 1993
# 
# ---------------------------------------------------

# descriptive statistics:

# Total number of electoral districts
# 129 (from RS).
# TOTAL NUMBER OF ELECTED MPS: 511
# TOTAL NUMBER OF LDPI MPS: 237
# TOTAL NUMBER OF LDPI LOSING CANDIDATES: 87




# ---------------------------------------------------
# a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 1993
pre.1993 <- subset(elec.dat, elec.dat$year==1993)
length(unique(pre.1993$district_year))
length(unique(pre.1993$hor_electoral_district))
# 129

# None missing as Amami islands merged into Kagoshima 1.

# Total number of electoral districts comprising one muni:
sum(pre.1993$totmun_in_electoral_district==1)
# 0
# No districts comprise one municipality

no_t_1993 <- c()







# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# as before:
# 6 electoral districts with municipalities spanning more than one district.
# 3 municipalities, each of which are in two districts

# what's the average number of voters in an electoral district?
mean(pre.1993$voting_pop_in_electoral_district)
# 707649.2

# Not same as above:
# 1201 drops out.  Chiba City became an ordinance-designated city in 1992, so how it 
# was measured has changed:

no_t_1993 <- c(no_t_1993, "1993_701", "1993_3301")









# ---------------------------------------------------
# c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.1993)
sum(is.na(pre.1993$muncode_num)) # 27

pre.1993.new <- subset(pre.1993, !is.na(pre.1993$muncode_num))
nrow(pre.1993.new)
length(unique(pre.1993.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.1993.nos <- subset(pre.1993.new, !(is.na(pre.1993.new$mun_ceif) & is.na(pre.1993.new$mun_ngaid)))
nrow(pre.1993.new) - nrow(pre.1993.nos)
# go rid of 124

# Which districts in pre.1993.new did not make it into pre.1993.nos? These are districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.1993.new, !(pre.1993.new$hor_electoral_district %in% pre.1993.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 9 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("1993", seirei.dists.only, sep="_")

length(no_t_1993)
# 2

no_t_1993 <- c(no_t_1993, seirei.dists.only2)

# make sure no duplicates
length(no_t_1993)
# 11
no_t_1993 <- no_t_1993[!duplicated(no_t_1993)]
length(no_t_1993)









# ---------------------------------------------------
#
# 1996
# 
# ---------------------------------------------------

# descriptive statistics from JHRED:

# Total number of SSD electoral districts 300.
# TOTAL NUMBER OF ELECTED MPS: 500
# TOTAL NUMBER OF LDP CANDS: 381
# TOTAL NUMBER OF LDPI MPS: 245
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 314
# TOTAL NUMBER OF PR-ONLY LDP MPS: 38
# TOTAL NUMBER OF PR-ONLY LDP candidaTES: 67








# ---------------------------------------------------
# a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 1996
pre.1996 <- subset(elec.dat, elec.dat$year==1996)
length(unique(pre.1996$district_year))
length(unique(pre.1996$hor_electoral_district))
# 289
# 11 missing districts.

# All are in base data. Identify the districts in base data that didn't make it into
# master data (meaning, they are comprised solely of split munis):
# 1304, 1306, 1309, 1313, 1316, 2208, 2716, 2717, 3901, 4301, 4401 
# (note: 4401 stops being split in 2003).

# These electoral districts are comprised solely of split municipalities, so should be 
# tournament-impossible:

missing_dists_1996 <- c("1996_1304",
                        "1996_1306",
                        "1996_1309", 
                        "1996_1313",
                        "1996_1316",
                        "1996_2208",
                        "1996_2716",
                        "1996_2717",
                        "1996_3901",
                        "1996_4301",
                        "1996_4401")

length(missing_dists_1996)
# 11 missing districts

# add these to vector
no_t_1996 <- c("1996_1304",
               "1996_1306",
               "1996_1309", 
               "1996_1313",
               "1996_1316",
               "1996_2208",
               "1996_2716",
               "1996_2717",
               "1996_3901",
               "1996_4301",
               "1996_4401")

# Total number of electoral districts comprising one municipality:
sum(pre.1996$totmun_in_electoral_district==1)
# 18 districts comprised of a single muni

muni_is_dist <- subset(pre.1996, pre.1996$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("1996", muni_is_dist2, sep="_")

muni_is_dist3

no_t_1996 <- c(no_t_1996, muni_is_dist3)

length(no_t_1996)
# 29







# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# Look at base data.
# 15 municipalities; 30 observations; 29 different districts.

# Make a list of those districts here:
split_dists2 <- c(1205,1206,1206,1207,1303,1304,1305,1306,1310,1309,1312,1313,1316,1317,2208,2209,2402,2403,2716,2717,3301,3302,3901,3902,4301,4302,4401,4402,4601,4602)
split_dists <- unique(split_dists2)
split_dists <- paste("1996", split_dists, sep="_")

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_1996.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_1996)]
length(split_dists3)
# 18 districts.

# What's the average number of voters in an electoral district?
mean(pre.1996$voting_pop_in_electoral_district)
#  306013.7

# If voting pop of a split municipality in 1996 in base data is less than 10% of this,
# then district is tournament-possible and we can keep it in data:
# 3902, 4402

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("1996_3902", "1996_4402")]
length(new.split_dists)
# 16 districts to exclude

length(no_t_1996)
# 29

no_t_1996 <- c(no_t_1996, new.split_dists)

# get rid of duplicates
no_t_1996 <- no_t_1996[!duplicated(no_t_1996)]
length(no_t_1996)










# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.1996)
sum(is.na(pre.1996$muncode_num))
pre.1996.new <- subset(pre.1996, !is.na(pre.1996$muncode_num))
length(unique(pre.1996.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.1996.nos <- subset(pre.1996.new, !(is.na(pre.1996.new$mun_ceif) & is.na(pre.1996.new$mun_ngaid)))
nrow(pre.1996.new) - nrow(pre.1996.nos)
# got rid of 127 municipalities

# Which districts in pre.1996.new did not make it into pre.1996.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities.
seirei.dists <- subset(pre.1996.new, !(pre.1996.new$hor_electoral_district %in% pre.1996.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 35 districts

# Viewing these electoral districts:
unique(seirei.dists$hor_electoral_district)
#[1]  401  402  102  103  101 1201 1403 1402 1401 1406 1407 1405 1408 1410 1409 2302 2301 2305 2303 2304 2601 2602 2704 2705
#[25] 2701 2703 2702 2801 2802 2803 3401 4010 4009 4001 4002

# Let us exclude these electoral districts:
seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("1996", seirei.dists.only, sep="_")
length(seirei.dists.only2)
# 35

length(no_t_1996)
# 45

no_t_1996 <- c(no_t_1996, seirei.dists.only2)

length(no_t_1996)
# 80










# ---------------------------------------------------
#
# 2000
# 
# ---------------------------------------------------

# descriptive stats from JHRED

# Total number of electoral districts 300 (from RS).
# TOTAL NUMBER OF ELECTED MPS: 480
# TOTAL NUMBER OF LDP CANDS: 377
# TOTAL NUMBER OF LDPI MPS: 247
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 311
# TOTAL NUMBER OF PR-ONLY LDP MPS: 49
# TOTAL NUMBER OF PR-ONLY LDP candidates: 66





# ---------------------------------------------------
# (a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 2000
pre.2000 <- subset(elec.dat, elec.dat$year==2000)
length(unique(pre.2000$district_year))
length(unique(pre.2000$hor_electoral_district))
# 289

# 11 missing districts are in base_data:
# 1304, 1306, 1309, 1313, 1316, 2208, 2716, 2717, 3901, 4301, 4401 (stops being split in 2003).
# All comprised solely of split munciipalities

missing_dists_2000 <- c("2000_1304",
                        "2000_1306",
                        "2000_1309", 
                        "2000_1313",
                        "2000_1316",
                        "2000_2208",
                        "2000_2716",
                        "2000_2717",
                        "2000_3901",
                        "2000_4301",
                        "2000_4401")

length(missing_dists_2000)
# 11

no_t_2000 <- c("2000_1304",
               "2000_1306",
               "2000_1309", 
               "2000_1313",
               "2000_1316",
               "2000_2208",
               "2000_2716",
               "2000_2717",
               "2000_3901",
               "2000_4301",
               "2000_4401")

length(no_t_2000)
# 11

# Total number of electoral districts comprising one muni:
sum(pre.2000$totmun_in_electoral_district==1)
# 18 districts

muni_is_dist <- subset(pre.2000, pre.2000$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("2000", muni_is_dist2, sep="_")

no_t_2000 <- c(no_t_2000, muni_is_dist3)
length(no_t_2000)
# 29 districts










# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# 15 municipalities; 30 observations; 29 different districts. Same as 1996.

split_dists2 <- c(1205,1206,1206,1207,1303,1304,1305,1306,1310,1309,1312,1313,1316,1317,2208,2209,2402,2403,2716,2717,3301,3302,3901,3902,4301,4302,4401,4402,4601,4602)
split_dists <- unique(split_dists2)
split_dists <- paste("2000", split_dists, sep="_")

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_2000.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_2000)]
length(split_dists3)

# what's the average number of voters in an electoral district?
mean(pre.2000$voting_pop_in_electoral_district)
# 312848.2

# If voting pop of a split municipality in 2000 in base data is less than 10% of this,
# then district is tournament-possible and we can keep it in data:
# 3902, 4402

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("2000_3902", "2000_4402")]
length(new.split_dists)
# 16 districts

length(no_t_2000)
# 29

no_t_2000 <- c(no_t_2000, new.split_dists)
length(no_t_2000)








# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.2000)
sum(is.na(pre.2000$muncode_num))
pre.2000.new <- subset(pre.2000, !is.na(pre.2000$muncode_num))
nrow(pre.2000.new)
length(unique(pre.2000.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.2000.nos <- subset(pre.2000.new, !(is.na(pre.2000.new$mun_ceif) & is.na(pre.2000.new$mun_ngaid)))
nrow(pre.2000.new) - nrow(pre.2000.nos)
# got rid of 128

# Which districts in pre.2000.new did not make it into pre.2000.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities.
seirei.dists <- subset(pre.2000.new, !(pre.2000.new$hor_electoral_district %in% pre.2000.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 35 districts

# Viewing them:
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("2000", seirei.dists.only, sep="_")
length(seirei.dists.only2)

length(no_t_2000)
# 45

no_t_2000 <- c(no_t_2000, seirei.dists.only2)
length(no_t_2000)
# 80 districts







# ---------------------------------------------------
#
# 2003
# 
# ---------------------------------------------------

# descriptive stats

# Total number of electoral districts: 300 (from RS).

# TOTAL NUMBER OF ELECTED MPS: 480
# TOTAL NUMBER OF LDPI MPS: 246
# TOTAL NUMBER OF LDP CANDS: 379
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 320
# TOTAL NUMBER OF PR-ONLY LDP MPS: 32
# TOTAL NUMBER OF PR-ONLY LDP candidaTES: 59







# ---------------------------------------------------
# (a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 2003
pre.2003 <- subset(elec.dat, elec.dat$year==2003)
length(unique(pre.2003$district_year))
length(unique(pre.2003$hor_electoral_district))
# 287

# 13 missing districts are: 
# 1206 (new), 1304, 1306, 1309, 1313, 1316, 1414 (new), 2201 (new), 2208, 2716, 2717, 3901, 4301.
# 4401 is no longer split.

missing_dists_2003 <- c("2003_1206",
                        "2003_1304",
                        "2003_1306",
                        "2003_1309", 
                        "2003_1313",
                        "2003_1316",
                        "2003_1414",
                        "2003_2201",
                        "2003_2208",
                        "2003_2716",
                        "2003_2717",
                        "2003_3901",
                        "2003_4301")

length(missing_dists_2003)

no_t_2003 <- c("2003_1206",
               "2003_1304",
               "2003_1306",
               "2003_1309", 
               "2003_1313",
               "2003_1316",
               "2003_1414",
               "2003_2201",
               "2003_2208",
               "2003_2716",
               "2003_2717",
               "2003_3901",
               "2003_4301")

length(no_t_2003)
# 13 districts

# Total number of electoral districts comprising one muni:
sum(pre.2003$totmun_in_electoral_district==1)
# 16 districts

muni_is_dist <- subset(pre.2003, pre.2003$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("2003", muni_is_dist2, sep="_")

no_t_2003 <- c(no_t_2003, muni_is_dist3)

length(no_t_2003)
length(unique(no_t_2003))








# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# 17 municipalities; 34 observations; 33 different districts.

split_dists2 <- c(1205,1206,1207,1303,1304,1305,1306,1310,1309,1312,1313,1316,1317,1414,1416,2201,2204,2207,2208,2402,2403,2716,2717,3301,3302,3501,3502,3901,3902,4301,4302,4601,4602)
split_dists <- unique(split_dists2)
split_dists <- paste("2003", split_dists, sep="_")

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_2003.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_2003)]
length(split_dists3)
# 20 districts 

# what's the average number of voters in an electoral district?
mean(pre.2003$voting_pop_in_electoral_district)
# 320516.9

# If voting pop of a split municipality in 2003 in base data is less than 10% of this,
# then district is tournament-possible and we can keep it in data:
# can keep this one in the data: 3502

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("2003_3502")]
length(new.split_dists)
# 19 districts

length(no_t_2003)
# 29
no_t_2003 <- c(no_t_2003, new.split_dists)
length(unique(no_t_2003))
# 48 districts








# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities.

nrow(pre.2003)
sum(is.na(pre.2003$muncode_num))
pre.2003.new <- subset(pre.2003, !is.na(pre.2003$muncode_num))
nrow(pre.2003.new)
length(unique(pre.2003.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.2003.nos <- subset(pre.2003.new, !(is.na(pre.2003.new$mun_ceif) & is.na(pre.2003.new$mun_ngaid)))
nrow(pre.2003.new) - nrow(pre.2003.nos)
# got rid of 137

# Which districts in pre.1996.new did not make it into pre.1996.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities.
seirei.dists <- subset(pre.2003.new, !(pre.2003.new$hor_electoral_district %in% pre.2003.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 36 districts

unique(seirei.dists$hor_electoral_district)
#[1]  401  402  102  103  101 1201 1403 1402 1401 1406 1407 1405 1408 1410 1409 2302 2301 2305 2303 2304 2601 2602 2704 2705
#[25] 2701 2703 2702 2801 2802 2803 3401 4010 4009 4001 4002

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("2003", seirei.dists.only, sep="_")
length(seirei.dists.only2)

length(no_t_2003)
# 48 districts

no_t_2003 <- c(no_t_2003, seirei.dists.only2)
length(no_t_2003)
# 84 districts
















# ---------------------------------------------------
#
# 2005
# 
# ---------------------------------------------------

# descriptive stats:

# Total number of electoral districts# 300 (from RS).
# TOTAL NUMBER OF ELECTED MPS: 480
# TOTAL NUMBER OF LDP CANDS: 388
# TOTAL NUMBER OF LDPI MPS: 311
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 332
# TOTAL NUMBER OF PR-ONLY LDP MPS: 29
# TOTAL NUMBER OF PR-ONLY LDP candidaTES: 56







# ---------------------------------------------------
# (a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 2005
pre.2005 <- subset(elec.dat, elec.dat$year==2005)
length(unique(pre.2005$district_year))
length(unique(pre.2005$hor_electoral_district))
# 281

# 19 missing districts are: 
# 1206, 1304, 1306, 1309, 1313, 1316, 1414, 1501 (new), 1601 (new), 2201, 2208, 2716, 2717, 
# 2901 (new), 3801 (new), 3901, 4201 (new), 4301, 4401 (new).

missing_dists_2005 <- c("2005_1206",
                        "2005_1304",
                        "2005_1306",
                        "2005_1309", 
                        "2005_1313",
                        "2005_1316",
                        "2005_1414",
                        "2005_1501",
                        "2005_1601",
                        "2005_2201",
                        "2005_2208",
                        "2005_2716",
                        "2005_2717",
                        "2005_2901",
                        "2005_3801",
                        "2005_3901",
                        "2005_4201",
                        "2005_4301",
                        "2005_4401")

length(missing_dists_2005)

no_t_2005 <- c("2005_1206",
               "2005_1304",
               "2005_1306",
               "2005_1309", 
               "2005_1313",
               "2005_1316",
               "2005_1414",
               "2005_1501",
               "2005_1601",
               "2005_2201",
               "2005_2208",
               "2005_2716",
               "2005_2717",
               "2005_2901",
               "2005_3801",
               "2005_3901",
               "2005_4201",
               "2005_4301",
               "2005_4401")

length(no_t_2005)
# 19

# Total number of electoral districts comprising one muni:
sum(pre.2005$totmun_in_electoral_district==1)
# 13 districts

muni_is_dist <- subset(pre.2005, pre.2005$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("2005", muni_is_dist2, sep="_")

no_t_2005 <- c(no_t_2005, muni_is_dist3)

length(no_t_2005)
# 32 districts










# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# 51 municipalities; 109 observations; 92 different districts.

split_dists2 <- c(201,204,801,802,804,1001,1002,1003,1205,1206,1207,1213,1208,1303,1304,1305,1306,1310,1309,1312,1313,1316,1317,1414,1416,1501,1502,1503,1504,1505,1601,1602,1901,1902,1903,2001,2002,2201,2204,2203,2207,2208,2202,2205,2206,2310,2309,2311,2314,2402,2403,2502,2504,2715,2716,2717,2901,2902,3201,3202,3301,3302,3304,3305,3303,3404,3405,3406,3402,3501,3502,3602,3603,3702,3703,3801,3802,3804,3901,3902,3903,4201,4202,4301,4302,4303,4304,4401,4402,4601,4602,4603)
split_dists <- unique(split_dists2)
split_dists <- paste("2005", split_dists, sep="_")

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_2005.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_2005)]
length(split_dists3)
# 73 districts

# What's the average number of voters in an electoral district?
mean(pre.2005$voting_pop_in_electoral_district)
# 325387.6

# If the voting pop of a split municipality in 2005 in base data is less than 10% of this,
# then the district is tournament-possible and we can keep it in data:
# Eyeball data -- list of these districts is below.
# NB: some districts have multiple split municipalities, so I summed number of voters 
# in split municipalities. If number of voters in split municipalities was less than 10% of
# this, I said it was tournament-possible.

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("2005_204",
                                                      "2005_802",
                                                      "2005_1001",
                                                      "2005_1901",
                                                      "2005_1903",
                                                      "2005_2002",
                                                      "2005_2202",
                                                      "2005_2203",
                                                      "2005_2205",
                                                      "2005_2206",
                                                      "2005_2314",
                                                      "2005_2502",
                                                      "2005_2715",
                                                      "2005_2902",
                                                      "2005_3405",
                                                      "2005_3402",
                                                      "2005_3405",
                                                      "2005_3502",
                                                      "2005_3602",
                                                      "2005_3603",
                                                      "2005_3702",
                                                      "2005_3804",
                                                      "2005_3903",
                                                      "2005_4202",
                                                      "2005_4303",
                                                      "2005_4304",
                                                      "2005_4402",
                                                      "2005_4603")]
length(new.split_dists)
# 46 districts with large split munis

length(no_t_2005)
# 32

no_t_2005 <- c(no_t_2005, new.split_dists)
length(no_t_2005)
# 78 districts







# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities.
# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
# Which districts in pre.1996.new did not make it into pre.1996.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities.

nrow(pre.2005)
sum(is.na(pre.2005$muncode_num))
# NB: this increases in this election because municipalities that ceased to exist due to 
# a merger that occurred within fiscal year 2005 are not matched to fiscal and demographic (NEEDs) data
# for fiscal year 2005.
# Thus, we have their elections data, but not their fiscal/demographic data.

pre.2005.new <- subset(pre.2005, !is.na(pre.2005$muncode_num))
nrow(pre.2005.new)

length(unique(pre.2005.new$hor_electoral_district))
# 280 (this deletes one more district than master data contains)

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.2005.nos <- subset(pre.2005.new, !(is.na(pre.2005.new$mun_ceif) & is.na(pre.2005.new$mun_ngaid)))
nrow(pre.2005.new) - nrow(pre.2005.nos)
# got rid of 138

# Which districts in pre.2005.new did not make it into pre.2005.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities.

seirei.dists <- subset(pre.2005.new, !(pre.2005.new$hor_electoral_district %in% pre.2005.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 37 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("2005", seirei.dists.only, sep="_")
length(seirei.dists.only2)

length(no_t_2005)
# 78 districts

no_t_2005 <- c(no_t_2005, seirei.dists.only2)
length(no_t_2005)
# 115 districts







# ---------------------------------------------------
#
# 2009
# 
# ---------------------------------------------------

# descriptive stats from JHRED

# Total number of electoral districts: 300 (from RS).

# TOTAL NUMBER OF ELECTED MPS: 480
# TOTAL NUMBER OF LDPI MPS: 123
# TOTAL NUMBER OF LDP CANDS: 355
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 318
# TOTAL NUMBER OF PR-ONLY LDP MPS: 9
# TOTAL NUMBER OF PR-ONLY LDP candidaTES: 37








# ---------------------------------------------------
# (a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 2009:
pre.2009 <- subset(elec.dat, elec.dat$year==2009)
length(unique(pre.2009$district_year))
length(unique(pre.2009$hor_electoral_district))
# 283 districts

# 17 missing districts are: 1206, 1304, 1306, 1309, 1313, 1316, 1414, 1601, 2101 (new), 2811 (new), 2901, 
# 3301 (new), 3801, 3901, 4201, 4301, 4401.

missing_dists_2009 <- c("2009_1206",
                        "2009_1304",
                        "2009_1306",
                        "2009_1309", 
                        "2009_1313",
                        "2009_1316",
                        "2009_1414",
                        "2009_1601",
                        "2009_2101",
                        "2009_2811",
                        "2009_2901",
                        "2009_3301",
                        "2009_3801",
                        "2009_3901",
                        "2009_4201",
                        "2009_4301",
                        "2009_4401")

length(missing_dists_2009)
# 17

no_t_2009 <- c("2009_1206",
               "2009_1304",
               "2009_1306",
               "2009_1309", 
               "2009_1313",
               "2009_1316",
               "2009_1414",
               "2009_1601",
               "2009_2101",
               "2009_2811",
               "2009_2901",
               "2009_3301",
               "2009_3801",
               "2009_3901",
               "2009_4201",
               "2009_4301",
               "2009_4401")

length(no_t_2009)
# 17 districts

# Total number of electoral districts comprising one muni:
sum(pre.2009$totmun_in_electoral_district==1)
# 12 districts

muni_is_dist <- subset(pre.2009, pre.2009$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("2009", muni_is_dist2, sep="_")

no_t_2009 <- c(no_t_2009, muni_is_dist3)

length(no_t_2009)
length(unique(no_t_2009))
# 29 districts







# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# 78 municipalities; 175 observations; 122 different districts.
# Districts with a split muni are below:

split_dists2 <- c(201,204,301,302,404,405,406,801,802,807,804,806,
                  901,902,904,1004,1005,1001,1002,1003,1111,1112,1113,1114,1106,
                  1107,1108,1205,1206,1207,1213,1208,1210,1211,1303,1304,1305,1306,
                  1310,1309,1312,1313,1316,1317,1414,1416,1501,1503,1504,1502,1505,
                  1601,1602,1801,1803,1802,1901,1902,1903,2001,2002,2101,2103,2207,
                  2208,2203,2204,2205,2202,2206,2310,2309,2311,2314,2401,2404,2402,2403,
                  2502,2504,2811,2812,2901,2902,3201,3202,3301,3302,3303,3304,3305,3404,
                  3405,3406,3402,3501,3502,3602,3603,3701,3702,3703,3801,3802,3804,3901,
                  3902,3903,4101,4102,4103,4201,4202,4301,4302,4304,4303,4401,4402,4601,4602,4603)

split_dists <- unique(split_dists2)
split_dists <- paste("2009", split_dists, sep="_")
length(split_dists)
# 122 districts

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_2009.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_2009)]
length(split_dists3)
# 105 districts

# what's the average number of voters in an electoral district?
mean(pre.2009$voting_pop_in_electoral_district)
# 329839.2

# If voting pop of a split municipality in 2009 in base data is less than 10% of this,
# then the district is tournament-possible and we can keep it in data:
# Districts in which the split municipality comprises less than 10% of this are below.
# NB: some districts have multiple split municipalities, so I summed the number of voters 
# in split municipalities.  If the number of voters in split municipalities was less than 10% of
# this, I said it was tournament-possible.

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("2009_204",
                                                      "2009_302",
                                                      "2009_405",
                                                      "2009_406",
                                                      "2009_807",
                                                      "2009_806",
                                                      "2009_904",
                                                      "2009_1111",
                                                      "2009_1114",
                                                      "2009_1210",
                                                      "2009_1211",
                                                      "2009_1802",
                                                      "2009_1903",
                                                      "2009_2002",
                                                      "2009_2103",
                                                      "2009_2203",
                                                      "2009_2204",
                                                      "2009_2202",
                                                      "2009_2206",
                                                      "2009_2314",
                                                      "2009_2502",
                                                      "2009_2902",
                                                      "2009_3305",
                                                      "2009_3402",
                                                      "2009_3502",
                                                      "2009_3602",
                                                      "2009_3603",
                                                      "2009_3804",
                                                      "2009_3903",
                                                      "2009_4202",
                                                      "2009_4304",
                                                      "2009_4304",
                                                      "2009_4303",
                                                      "2009_4402")]
length(new.split_dists)
# 72 districts

length(no_t_2009)
# 29 districts

no_t_2009 <- c(no_t_2009, new.split_dists)

length(no_t_2009)
# 101 districts








# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.2009)
sum(is.na(pre.2009$muncode_num))
pre.2009.new <- subset(pre.2009, !is.na(pre.2009$muncode_num))
nrow(pre.2009.new)
length(unique(pre.2009.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.2009.nos <- subset(pre.2009.new, !(is.na(pre.2009.new$mun_ceif) & is.na(pre.2009.new$mun_ngaid)))
nrow(pre.2009.new) - nrow(pre.2009.nos)
# got rid of 157 municipalities

# Which districts in pre.2009.new did not make it into pre.2009.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.2009.new, !(pre.2009.new$hor_electoral_district %in% pre.2009.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 43 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("2009", seirei.dists.only, sep="_")
length(seirei.dists.only2)
# 43 districts

length(no_t_2009)
# 101 districts

no_t_2009 <- c(no_t_2009, seirei.dists.only2)
length(no_t_2009)
# 144 districts

# Note that districts can appear in this list twice if they meet two conditions.
# Technically, there are 43 districts comprised solely of wards in ordinance-designated cities, 
# but some of these districts were eliminated in (b) because they have many voters
# living in split municipalities. e.g. 1501.

# So, let us eliminate duplicates in this list:
no_t_2009 <- no_t_2009[!duplicated(no_t_2009)]
length(no_t_2009)
# 142 districts

# NB: Table 5.1 in book has 41 districts under ordinance-designated city districts.
# Technically, it is 43, but two fell under the former category, so have already been eliminated.
# My coding went sequentially, from "solo municipality" districts, to "split municipality" districts,
# to "ODC-districts".










# ---------------------------------------------------
#
# 2012
# 
# ---------------------------------------------------

# descriptive stats from JHRED

# Total number of electoral districts # 300 (from RS).
# Total number of electoral districts: 300 (from RS).
# TOTAL NUMBER OF ELECTED MPS: 480
# TOTAL NUMBER OF LDPI MPS: 299
# TOTAL NUMBER OF LDP CANDS: 346
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 297
# TOTAL NUMBER OF PR-ONLY LDP MPS: 18
# TOTAL NUMBER OF PR-ONLY LDP candidaTES: 49








# ---------------------------------------------------
# (a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 2012
pre.2012 <- subset(elec.dat, elec.dat$year==2012)
length(unique(pre.2012$district_year))
length(unique(pre.2012$hor_electoral_district))
# 284 districts

# 16 missing districts are: 
# 1206, 1304, 1306, 1309, 1313, 1316, 1601, 2101, 2811, 2901, 3301, 3801, 3901, 4201, 4401, 4301

missing_dists_2012 <- c("2012_1206",
                        "2012_1304",
                        "2012_1306",
                        "2012_1309", 
                        "2012_1313",
                        "2012_1316",
                        "2012_1601",
                        "2012_2101",
                        "2012_2811",
                        "2012_2901",
                        "2012_3301",
                        "2012_3801",
                        "2012_3901",
                        "2012_4201",
                        "2012_4301",
                        "2012_4401")

length(missing_dists_2012)
# 16

no_t_2012 <- c("2012_1206",
               "2012_1304",
               "2012_1306",
               "2012_1309", 
               "2012_1313",
               "2012_1316",
               "2012_1601",
               "2012_2101",
               "2012_2811",
               "2012_2901",
               "2012_3301",
               "2012_3801",
               "2012_3901",
               "2012_4201",
               "2012_4301",
               "2012_4401")

length(no_t_2012)
# 16 districts

# Total number of electoral districts comprising one muni:
sum(pre.2012$totmun_in_electoral_district==1)
# 13 districts

muni_is_dist <- subset(pre.2012, pre.2012$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("2012", muni_is_dist2, sep="_")

no_t_2012 <- c(no_t_2012, muni_is_dist3)

length(no_t_2012)
# 29 districts










# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# 81 municipalities; 189 observations; 124 different districts.

split_dists2 <- c(201,204,301,302,404,405,406,801,802,807,804,806,901,
                  902,904,905,1004,1005,1001,1002,1003,1111,1112,1113,1114,
                  1106,1107,1108,1205,1206,1207,1213,1208,1210,1211,1303,1304,1305,1306,
                  1310,1309,1312,1313,1316,1317,1414,1416,1501,1503,1502,1504,1505,1601,1602,
                  1801,1803,1802,1901,1902,1903,2001,2002,2101,2103,2207,2208,2203,
                  2204,2205,2202,2206,2310,2309,2311,2314,2401,2404,2402,2403,2502,
                  2504,2811,2812,2901,2902,3201,3202,3301,3302,3303,3304,3305,3404,
                  3405,3406,3402,3501,3503,3502,3602,3603,3701,3702,3703,3801,3802,3804,
                  3901,3902,3903,4101,4102,4103,4201,4202,4303,4304,4401,4402,4601,4602,4603,4301,4302)

split_dists <- unique(split_dists2)
split_dists <- paste("2012", split_dists, sep="_")
length(split_dists)
# 124 districts

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_2012.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_2012)]
length(split_dists3)
# 108 districts

# what's the average number of voters in an electoral district?
mean(pre.2012$voting_pop_in_electoral_district)
# 328731.8

# If voting pop of a split municipality in 2012 in base data is less than 10% of this,
# then district is tournament-possible and we can keep it in data:

# Districts in which the split municipality comprises less than 10% of this are below.
# NB: some districts have multiple split municipalities, so I summed the number of voters 
# in split municipalities. If the number of voters in split municipalities was less than 10% of
# this, I said it was tournament-possible.

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("2012_204",
                                                      "2012_302",
                                                      "2012_405",
                                                      "2012_406",
                                                      "2012_807",
                                                      "2012_806",
                                                      "2012_1111",
                                                      "2012_1210",
                                                      "2012_1211",
                                                      "2012_2202",
                                                      "2012_2204",
                                                      "2012_2314",
                                                      "2012_2502",
                                                      "2012_2902",
                                                      "2012_3402",
                                                      "2012_3503",
                                                      "2012_3602",
                                                      "2012_3603",
                                                      "2012_3804",
                                                      "2012_3903",
                                                      "2012_4202",
                                                      "2012_4402")]

length(new.split_dists)
# 86 districts

length(no_t_2012)
# 29

no_t_2012 <- c(no_t_2012, new.split_dists)
length(no_t_2012)
# 115 districts








# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

nrow(pre.2012)
sum(is.na(pre.2012$muncode_num))
pre.2012.new <- subset(pre.2012, !is.na(pre.2012$muncode_num))
nrow(pre.2012.new)
length(unique(pre.2012.new$hor_electoral_district))

# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.2012.nos <- subset(pre.2012.new, !(is.na(pre.2012.new$mun_ceif) & is.na(pre.2012.new$mun_ngaid)))
nrow(pre.2012.new) - nrow(pre.2012.nos)
# go rid of 158 observations

# Which districts in pre.2012.new did not make it into pre.2012.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities:

seirei.dists <- subset(pre.2012.new, !(pre.2012.new$hor_electoral_district %in% pre.2012.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 43 districts

unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("2012", seirei.dists.only, sep="_")
length(seirei.dists.only2)
# 43 districts

length(no_t_2012)
# 115

no_t_2012 <- c(no_t_2012, seirei.dists.only2)

# Note that districts can appear in this list twice if they meet two conditions.
# Technically, there are 43 districts comprised solely of wards in ordinance-designated cities, 
# but some of these districts were eliminated in (b) because they have many voters
# living in split municipalities. e.g. 1501.

# So, let us get rid of duplicates:
no_t_2012 <- no_t_2012[!duplicated(no_t_2012)]
length(no_t_2012)
# 155 districts

# NB: Table 5.1 in book has 41 districts under ordinance-designated city districts.
# Technically, it is 43, but two fell under the former category, so have already been eliminated.
# My coding went sequentially, from "solo municipality" districts, to "split municipality" districts,
# to "ODC-districts".























# ---------------------------------------------------
#
# 2014
# 
# ---------------------------------------------------

# Descriptive stats from JHRED.

# Total number of electoral districts: 295

# TOTAL NUMBER OF ELECTED MPS: 475
# TOTAL NUMBER OF LDPI MPS: 296
# TOTAL NUMBER OF LDP CANDS: 363
# TOTAL NUMBER OF LDPI SSD CANDIDATES: 294
# TOTAL NUMBER OF PR-ONLY LDP MPS: 16
# TOTAL NUMBER OF PR-ONLY LDP candidaTES: 69












# ---------------------------------------------------
# (a) Electoral districts comprised of a single municipality:

# Total number of electoral districts in data in 2014
pre.2014 <- subset(elec.dat, elec.dat$year==2014)
length(unique(pre.2014$district_year))
length(unique(pre.2014$hor_electoral_district))
# 279 districts

# 16 missing districts (number of districts was reduced to 295 in 2014)

missing_dists_2014 <- c("2014_1204",
                        "2014_1206",
                        "2014_1304",
                        "2014_1306",
                        "2014_1309", 
                        "2014_1313", 
                        "2014_1316", 
                        "2014_1601",
                        "2014_2101",
                        "2014_2901",
                        "2014_2811",
                        "2014_4201",
                        "2014_3801",
                        "2014_4401",
                        "2014_3301",
                        "2014_4301")


length(missing_dists_2014)
# 16 districts

no_t_2014 <- c("2014_1204",
               "2014_1206",
               "2014_1304",
               "2014_1306",
               "2014_1309", 
               "2014_1313", 
               "2014_1316", 
               "2014_1601",
               "2014_2101",
               "2014_2901",
               "2014_2811",
               "2014_4201",
               "2014_3801",
               "2014_4401",
               "2014_3301",
               "2014_4301")

length(no_t_2014)
# 16 districts

# Total number of electoral districts comprising one muni:
sum(pre.2014$totmun_in_electoral_district==1)
# 12 districts

muni_is_dist <- subset(pre.2014, pre.2014$totmun_in_electoral_district==1)
muni_is_dist2 <- muni_is_dist$hor_electoral_district
muni_is_dist3 <- paste("2014", muni_is_dist2, sep="_")

no_t_2014 <- c(no_t_2014, muni_is_dist3)

length(no_t_2014)
# 28 districts
length(unique(no_t_2014))
# 28 districts










# ---------------------------------------------------
# (b) Electoral districts with municipalities spanning more than one district.

# 72 municipalities; 170 observations; 115 different districts.

split_dists2 <- c(2310,2311,2314,2309,201,204,801,802,804,806,807,301,302,
                  3801,3802,4401,4402,3301,3302,3303,3304,3305,3701,3702,
                  3703,4601,4602,4603,1410,1414,1416,1418,2101,2103,4301,
                  4302,4303,4304,1001,1002,1003,1004,1005,3901,3902,1111,
                  1112,1113,1114,1106,1107,1108,2502,2504,2202,2203,2204,
                  2205,2206,2207,2208,3201,3202,1210,1211,1213,1204,1205,
                  1206,1207,1208,1310,1312,1313,1316,1317,1303,1304,1305,
                  1306,1309,901,902,904,905,1601,1602,4201,4202,4203,4204,
                  2001,2002,2901,2902,1501,1502,1503,1504,1505,2811,2812,
                  3402,3404,3405,3406,2401,2402,2403,2404,405,406,3501,3502,3503)

split_dists <- unique(split_dists2)
split_dists <- paste("2014", split_dists, sep="_")
length(split_dists)
# 115 districts

# Some of these districts will be comprised solely of splits, so
# will already be in missing_dists_2014.

# How many districts consisting of split municipalities *and* non-split municipalities?
split_dists3 <- split_dists[!(split_dists %in% missing_dists_2014)]
length(split_dists3)
# 99 districts

# what's the average number of voters in an electoral district?
mean(pre.2014$voting_pop_in_electoral_district)
# 331781.2

# If voting pop of a split municipality in 2014 in base data is less than 10% of this,
# then the district is tournament-possible and we can keep it in data:
# NB: some districts have multiple split municipalities, so I summed the number of voters 
# in split municipalities. If the number of voters in split municipalities was less than 10% of
# this, I said it was tournament-possible.

# Get the list of districts with large split municipalities to exclude from data 
new.split_dists <- split_dists3[! split_dists3 %in% c("2014_204",
                                                      "2014_302",
                                                      "2014_806",
                                                      "2014_807",
                                                      "2014_1111",
                                                      "2014_1210",
                                                      "2014_1211",
                                                      "2014_2103",
                                                      "2014_2202",
                                                      "2014_2204",
                                                      "2014_2206",
                                                      "2014_2314",
                                                      "2014_2502",
                                                      "2014_2902",
                                                      "2014_3402",
                                                      "2014_3502",
                                                      "2014_3503",
                                                      "2014_3802",
                                                      "2014_4202",
                                                      "2014_4203",
                                                      "2014_4303",
                                                      "2014_4402")]
length(new.split_dists)
# 77 districts

# NB: district 405 is miscoded.  It has mun_voting_pop=32,314, so should be in this list too.

length(no_t_2014)
# 28 districts

no_t_2014 <- c(no_t_2014, new.split_dists)
length(no_t_2014)
# 105 districts










# ---------------------------------------------------
# (c) Electoral districts composed solely of municipalities in ordinance-designated cities. 

# This requires care because when I collected the base data, I had elections data until 
# 2014, but NEEDs data only until 2012.  Thus, in the base data, muncode_num and all fiscal and demographic
# variables are not available past 2012.

# Later, I added these variables myself from Japanese government's E statistics portal
# for 2013, 2014, and 2015.  Thus, these variables exist in the master data but not in the base data.
# Also, when I collected the new variables, I only collected the ones I needed, and not all the 
# ones I had collected in the base data.

# I checked, and the 2013-2015 data uses the same rules: mun_ceif is not recorded for
# both wards in ODC and wards in Tokyo, but wards in Tokyo receive NTD (ngaid), whereas wards in ODC
# do not. So we can use the same decision rule.

nrow(pre.2014)
sum(is.na(pre.2014$muncode_num))
pre.2014.new <- subset(pre.2014, !is.na(pre.2014$muncode_num))
nrow(pre.2014.new)
length(unique(pre.2014.new$hor_electoral_district))


# Filter out observations that satisfy these conditions: mun_ceif==NA *and* mun_ngaid == NA
pre.2014.nos <- subset(pre.2014.new, !(is.na(pre.2014.new$mun_ceif) & is.na(pre.2014.new$mun_ngaid)))
nrow(pre.2014.new) - nrow(pre.2014.nos)
# go rid of 157 observations

# Which districts in pre.2014.new did not make it into pre.2014.nos? These are electoral districts
# comprised solely of municipalities in ordinance-designated cities.

seirei.dists <- subset(pre.2014.new, !(pre.2014.new$hor_electoral_district %in% pre.2014.nos$hor_electoral_district))
length(unique(seirei.dists$hor_electoral_district))
# 43 districts
unique(seirei.dists$hor_electoral_district)

seirei.dists.only <- unique(seirei.dists$hor_electoral_district)
seirei.dists.only2 <- paste("2014", seirei.dists.only, sep="_")
length(seirei.dists.only2)
# 43 districts

length(no_t_2014)
# 105 districts

no_t_2014 <- c(no_t_2014, seirei.dists.only2)

# Note that districts can appear in this list twice if they meet two conditions.
# Technically, there are 43 districts comprised solely of wards in ordinance-designated cities, 
# but some of these districts were eliminated in (b) because they have many voters
# living in split municipalities.

no_t_2014 <- no_t_2014[!duplicated(no_t_2014)]
length(no_t_2014)
# 143

# NB: Table 5.1 in book has 38 districts under ordinance-designated city districts.
# Technically, it is 43, but five fell under the former category, so have already been eliminated.
# My coding went sequentially, from "solo municipality" districts, to "split municipality" districts,
# to "ODC-districts".

















# --------------------------------------------
# CREATING A FULL LIST OF TOURNAMENT-IMPOSSIBLE DISTRICTS

no_t_all <- c(no_t_1980, 
              no_t_1983, 
              no_t_1986, 
              no_t_1989, 
              no_t_1993, 
              no_t_1996, 
              no_t_2000, 
              no_t_2003, 
              no_t_2005, 
              no_t_2009,
              no_t_2012, 
              no_t_2014)


# SAVE OUT THESE DISTRICT CODES

no_t_all.df <- as.data.frame(no_t_all)
write.csv(no_t_all.df, file = "no_tmts_districts2.csv")












# ----------------------------------------------
#
# GRAPHING SHARE OF ELECTORAL DISTRICTS WHERE TOURNAMENTS ARE IMPOSSIBLE 
# EVERY YEAR
# 
# FIGURE 5.7
#
# -----------------------------------------------

# Make a vector of years
years <- c(2014, 2012, 2009, 2005, 2003, 2000, 1996, 1993, 1990, 1986, 1983, 1980)
share <- c(length(no_t_2014)/295,
           length(no_t_2012)/300,
           length(no_t_2009)/300,
           length(no_t_2005)/300,
           length(no_t_2003)/300,
           length(no_t_2000)/300,
           length(no_t_1996)/300,
           length(no_t_1993)/129,
           length(no_t_1989)/130,
           length(no_t_1986)/130,
           length(no_t_1983)/130,
           length(no_t_1980)/130)

par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
plot(years, share, cex.lab = 2.5, pch=19, col="black", ylim=c(0,1), 
     ylab="Proportion", xlab="", xaxt="n")
axis(1, at=c(1980, 1983, 1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009, 2012, 2014), cex.axis=1.3)
lines(years, share, col="black", lwd =3)
text(x=2002.5, y=0.51, labels="Share of Electoral Districts in Which", cex=2.5, xpd=NA, pos=2, col="black")
text(x=2000, y=0.45, labels="Tournaments are Not Possible", cex=2.5, xpd=NA, pos=2, col="black")
arrows(1990, 0.38, 1992, 0.2, lwd=3, col = "black")

# 1300 x 1100
# saved as share_tpossible_districts





# ----------------------------------------------
#
# GRAPHING SHARE OF VOTERS SUBJECT TO A TOURNAMENT EVERY YEAR
# 
# FIGURE 5.8
#
# -----------------------------------------------


# --------------------------------------------
# Use ku_electorate to calculate share of Japanese voters in tournament-possible districts.

library(readr)

rs <- read_csv("Reed-Smith-JHRED.csv", 
               locale = locale(encoding = "SHIFT-JIS"))

# restrict to elections after 1979, no by elections, and exclude PR-only cands
rs2 <- subset(rs, rs$year>1979 & rs$byelection==0 & rs$kucode!=0)

# Change 1990 to 1989 so it matches with my data
table(rs2$year)
rs2$year_adj <- ifelse(rs2$year=="1990", paste("1989"), rs2$year)
table(rs2$year_adj)
rs2$district_year <- paste(rs2$year_adj, rs2$kucode, sep="_")

# Read in indicator variable for districts where tournaments are not possible:
notourn <- read.csv(file = "no_tmts_districts2.csv")
rs2$tmt_possible <- ifelse(rs2$district_year %in% notourn$no_t_all, 0, 1) 
table(rs2$tmt_possible)
# master data stored in rs2, with tmt_possible added on

# For each election year, calculate number of voters subject to a tournament:
rs3 <- rs2[, c("year", "district_year", "ku_electorate", "tmt_possible")]
rs4 <- rs3[!duplicated(rs3),]
length(unique(rs4$district_year))
# 2744

# How many voters in Japan in each year?
voters_year <- tapply(rs4$ku_electorate, rs4$year, sum)
voters_year

# How many voters in Japan in each year in tournament-possible versus tournament-impossible districts ?
voters_year_tposs <- tapply(rs4$ku_electorate, list(rs4$year, rs4$tmt_possible), sum)
voters_year_tposs

# SHARE OF VOTERS SUBJECT TO A TOURNAMENT
share <- voters_year_tposs[,2]/voters_year
share

years <- c(2014, 2012, 2009, 2005, 2003, 2000, 1996, 1993, 1990, 1986, 1983, 1980)
rev.share <- rev(share)

par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
plot(years, rev.share, cex.lab = 2.5, pch=19, col="black", ylim=c(0,1), 
     ylab="Proportion", xlab="", xaxt="n", cex.lab=2)
axis(1, at=c(1980, 1983, 1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009, 2012, 2014), cex.axis=1.2)
lines(years, rev.share, col="black", lwd =3)
text(x=1996, y=0.58, labels="Share of Voters in Japan", cex=2.5, xpd=NA, pos=2, col="black")
text(x=1995, y=0.52, labels="in Tournament-Possible", cex=2.5, xpd=NA, pos=2, col="black")
text(x=1995, y=0.46, labels="Electoral Districts", cex=2.5, xpd=NA, pos=2, col="black")
arrows(1988, 0.62, 1990, 0.84, lwd=3, col = "black")

# 1300 x 1100
# saved as share_tpossible_voters




# -------------------------------------------------------
# Not reported, but: How many people vote for LDP?

# Restrict rs2 to LDP candidates only:
check <- subset(rs2, rs2$party_en=="LDP")

# Sum number of votes these cands got by election year:
ldpvoters_year <- tapply(check$ku_vote, check$year, sum)
ldpvoters_year
# 1980     1983     1986     1990     1993     1996     2000     2003     2005     2009     2012     2014 
# 28262408 25935882 29875496 29997599 22974558 21836085 24945804 26089326 32518388 27301980 25643306 25521136

# Can view share here, ranges from 34.9% to 22% in 1996
ldpvoters_year/voters_year








# --------------------------------------------
# Attach the indicator variable for districts where tournaments are not possible
# onto elec.dat (used above), and then merge it back into dat on "iD" variable
# and save it out:

# Read in indicator variable for districts where tournaments are not possible:
notourn <- read.csv(file = "no_tmts_districts2.csv")

# Code a new variable indicating whether an electoral district is tournament-possible or not:
elec.dat$tmt_possible <- ifelse(elec.dat$district_year %in% notourn$no_t_all, 0, 1)
table(elec.dat$tmt_possible)

# Strip elec.dat down to vars I need, id and new indicator var
myvars <- c("id", "tmt_possible") 
elec.dat2 <- elec.dat[myvars]

# Merge that into dat:
dat1 <- merge(dat, elec.dat2, by="id", all=T)

saveRDS(dat1, file = "Master_plus_Snow_Turn_Trans_Dis6.rds")













# -------------------------------
#
# TABLE 5.2
# Examining support for LDP candidates and turnout in tournament-possible districts versus
# tournament-impossible districts
# 
# ---------------------------------




# --------------------------------------------------
# Read in full RS data, make district_year variable:
library(readr)
rs <- read_csv("Reed-Smith-JHRED.csv", 
               locale = locale(encoding = "SHIFT-JIS"))

# Look at elections from 1980-2014, exclude by elections and PR-only cands:
rs2 <- subset(rs, rs$year>1979 & rs$byelection==0 & rs$kucode!=0)
table(rs2$year)
rs2$year_adj <- ifelse(rs2$year=="1990", paste("1989"), rs2$year)
table(rs2$year_adj)
rs2$district_year <- paste(rs2$year_adj, rs2$kucode, sep="_")


# Read in indicator variable for districts where tournaments are not possible:
notourn <- read.csv(file = "no_tmts_districts2.csv")
rs2$tmt_possible <- ifelse(rs2$district_year %in% notourn$no_t_all, 0, 1) 
table(rs2$tmt_possible)
# master data stored in rs2, with tmt_possible added on






# ----------------------------------------------------------
# Create an indicator variable for district_years without LDP candidates:
dists <- unique(rs2$district_year) # 2744

no_ldp_dists <- c()

for(i in 1:length(dists)){
  d <- subset(rs2, rs2$district_year==dists[i])
  e <- subset(d, d$party_en=="LDP")
  no_ldp_dists0 <- ifelse(nrow(e)==0, 1, 0)
  no_ldp_dists <- c(no_ldp_dists, no_ldp_dists0)
}

dta <- data.frame(dists, no_ldp_dists)
dta2 <- subset(dta, dta$no_ldp_dists==1)

# These are district_years where there is no LDP candidate:
dta2$dists

# Attach this indicator onto master data (rs2):
rs2$no_LDP_cand <- ifelse(rs2$district_year %in% dta2$dists, 1, 0)
# master data stored in rs2, with tmt_possible and no_LDP_cand added on




# ----------------------------------------------------------
# For each district_year with an LDP candidate, calculate share of voters voting for LDP candidate
# and number of LDP candidates:
rs3 <- subset(rs2, rs2$no_LDP_cand==0)
length(unique(rs3$district_year))
dists <- unique(rs3$district_year) # 2636
nldpcands <- c()
LDP_VS <- c()

for(i in 1:length(dists)){
  d <- subset(rs3, rs3$district_year==dists[i])
  e <- subset(d, d$party_en=="LDP")
  nldpcands0 <- nrow(e)
  nldpcands <- c(nldpcands, nldpcands0)
  f <- sum(e$ku_vote)
  ldp_vs0 <- f/unique(e$ku_electorate)
  LDP_VS <- c(LDP_VS, ldp_vs0)
}
dta <- data.frame(dists, nldpcands, LDP_VS)
colnames(dta) <- c("district_year", "nldpcands", "LDP_VS")

# Attach nldpcands and LDP_VS onto master data. 
# Edit nldpcands so it has 0 for district_years without ldp cands:
mer <- merge(rs2, dta, by="district_year", all=T)
mer$nldpcands <- ifelse(mer$no_LDP_cand==1, 0, mer$nldpcands)

# master data stored in mer, with tmt_possible, no_LDP_cand, nldpcands, LDP_VS added on





# ----------------------------------------------------------

# For each district_year with an LDP candidate, create an indicator variable for districts with ldp winners:
rs3 <- subset(rs2, rs2$no_LDP_cand==0)
length(unique(rs3$district_year))
dists <- unique(rs3$district_year) # 2636
ldp_winner <- c()

for(i in 1:length(dists)){
  d <- subset(rs3, rs3$district_year==dists[i])
  e <- subset(d, d$result==1 & d$party_en=="LDP")
  ldp_winner0 <- ifelse(nrow(e)>0, 1, 0)
  ldp_winner <- c(ldp_winner, ldp_winner0)
}
dta <- data.frame(dists, ldp_winner)
colnames(dta) <- c("district_year", "ldp_winner")

# attach new variable onto mer
mer2 <- merge(mer, dta, by="district_year", all=T)

# master data stored in mer2, with tmt_possible, no_LDP_cand, nldpcands, LDP_VS, ldp_winner added on



# ---------------------------------------------------------------
# Make turnout var
mer2$dist_turnout <- mer2$ku_totvote/mer2$ku_electorate


# ---------------------------------------------------------------------
# Make dataset for regression:

myvars <- c("district_year", "LDP_VS", "tmt_possible", "ku_m", 
            "year", "no_LDP_cand", "ldp_winner", "dist_turnout", "nldpcands") 

mer3 <- mer2[myvars]
dim(mer3)

mer4 <- mer3[!duplicated(mer3),]
dim(mer4)
# 2744

length(unique(mer4$district_year))
# 2744








#----------------------------------------------------------
# MODEL 1:
# 
# Do LDP candidates capture a higher share of the vote in districts where tournaments are 
# possible?
# Eyeball year by year first:

dat1996poss <- subset(mer4, mer4$year==1996 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat1996poss$LDP_VS)
dat1996imposs <- subset(mer4, mer4$year==1996 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat1996imposs$LDP_VS)

dat2000poss <- subset(mer4, mer4$year==2000 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat2000poss$LDP_VS)
dat2000imposs <- subset(mer4, mer4$year==2000 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat2000imposs$LDP_VS)

dat2003poss <- subset(mer4, mer4$year==2003 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat2003poss$LDP_VS)
dat2003imposs <- subset(mer4, mer4$year==2003 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat2003imposs$LDP_VS)

dat2005poss <- subset(mer4, mer4$year==2005 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat2005poss$LDP_VS)
dat2005imposs <- subset(mer4, mer4$year==2005 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat2005imposs$LDP_VS)
t.test(dat2005poss$LDP_VS, dat2005imposs$LDP_VS) # yes

dat2009poss <- subset(mer4, mer4$year==2009 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat2009poss$LDP_VS)
dat2009imposs <- subset(mer4, mer4$year==2009 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat2009imposs$LDP_VS)

dat2012poss <- subset(mer4, mer4$year==2012 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat2012poss$LDP_VS)
dat2012imposs <- subset(mer4, mer4$year==2012 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat2012imposs$LDP_VS)

dat2014poss <- subset(mer4, mer4$year==2014 & mer4$nldpcands!=0 & mer4$tmt_possible==1)
mean(dat2014poss$LDP_VS)
dat2014imposs <- subset(mer4, mer4$year==2014 & mer4$nldpcands!=0 & mer4$tmt_possible==0)
mean(dat2014imposs$LDP_VS)

# Note that LDP_VS is NA when no LDP candidate:
summary(mer4$LDP_VS)

mod1 <- lm(LDP_VS ~ tmt_possible +as.factor(year) +  ku_m, data=mer4)
summary(mod1) 





#----------------------------------------------------------
# MODEL 2:
# 
# Is LDP more likely to win tournament_possible districts?

mod2 <- lm(ldp_winner ~ tmt_possible + as.factor(year) + ku_m, data=mer4)
summary(mod2) 






#----------------------------------------------------------
# MODEL 3:
# 
# Are the districts where the LDP doesn't run a candidate more likely to be tournament-impossible
# districts?

# eyeball data:
dat_nolc <- subset(mer4, mer4$no_LDP_cand==1)
length(unique(dat_nolc$district_year))
# 108 districts 

# of these, how many are t-possible?
dat_poss <- subset(mer4, mer4$no_LDP_cand==1 & mer4$tmt_possible==1)
length(unique(dat_poss$district_year))
# 52 districts

# of these, how many are t-impossible?
dat_imposs <- subset(mer4, mer4$no_LDP_cand==1 & mer4$tmt_possible==0)
length(unique(dat_imposs$district_year))
# 56 districts

mod3 <- lm(no_LDP_cand ~ tmt_possible + as.factor(year) + ku_m, data=mer4)
summary(mod3) 









# ----------------------------------------------------------
# MODEL 4:
#
# Is turnout higher in tournament_possible districts?

mod4 <- lm(dist_turnout ~ tmt_possible + as.factor(year) + ku_m, data=mer4)
summary(mod4) 





# ----------------------------------------------------------
# print the table:

stargazer(mod1, mod2, mod3, mod4,
          no.space = T,
          omit.stat = c("f", "adj.rsq"),
          star.cutoffs = c(0.05, 0.01, 0.001))






















# -------------------------------
# FIGURE 5.1
#
# Density plot of number of municipalities per district.
#
#---------------------------------

setwd("C:/Users/ac6037/Dropbox/TARGETING/replication2018/ANALYSIS OF MASTER DATA")

dat <- readRDS("Master_plus_Snow_Turn_Trans_Dis6.rds")

options(max.print=1000000)

elec.dat <- dat[!is.na(dat$hor_electoral_district),]

# "num" is a count of the number of municipalities
# in each electoral district.  This was calculated after removing split municipalities.
# So, "if we include them, "num" captures number of perfectly-contained municipalities 
# in each electoral district.

# Variable "last" is coded once per electoral district, so restricting master data
# to last==1 has the effect of making a district-level data set.

# Mean prior to ER
bef <- subset(elec.dat, elec.dat$year < 1995 & elec.dat$last == 1)
table(bef$year)
mean(bef$num)
summary(bef$num)

# Mean after ER, prior to cutting number of munis
aft0 <- subset(elec.dat, elec.dat$year >= 1995 & elec.dat$last == 1)
aft <- subset(aft0, aft0$year<2004)
table(aft$year)
mean(aft$num)
summary(aft$num)

# Mean after ER and after cutting number of munis
aft1 <- subset(elec.dat, elec.dat$year > 2003 & elec.dat$last == 1)
table(aft1$year)
mean(aft1$num)
summary(aft1$num)

lmts <- c(0,64)

avg.bef <- bef$num
avg.aft <- aft$num
avg.aft1 <- aft1$num

par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)

plot(density(na.omit(avg.bef), from = 0, to = 65), xlim = c(0,65), ylim = c(0,.12),
     xlab = "", ylab = "", main = "", lwd=3)

par(new=T, mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0))

plot(density(na.omit(avg.aft), from = 0, to = 65), xlim = c(0,65), ylim = c(0,.12),
     xlab = "", ylab = "",
     main = "", lty=2, col = "grey27", lwd=3)

par(new=T)
plot(density(na.omit(avg.aft1), from = 0, to = 65), xlim = c(0,65), ylim = c(0,.12),
     xlab = "Number of Municipalities Per Electoral District", ylab = "Density",
     main = "", lty=3, col = "grey46", lwd=3, cex.lab = 1.8)

legend(x=30, y=.035, legend = "Before Electoral Reform (1980-1994)", bty = "n", cex=1.5)
legend(x=5, y=.065, legend = "After Electoral Reform, Pre-Municipal Mergers (1996-2003)", bty = "n", text.col ="grey27", cex=1.5)
legend(x=3, y=.1, legend = "After Municipal Mergers (2005-2014)", bty = "n", text.col = "grey46", cex=1.5)




